###############################################################################-
# Created By: Pietryka
# Creation Date:  2017-05-12
# Purpose: create dyadic data ("../Data/Derived/dyads_df.rds")
# Contact: mpietryka@fsu.edu
###############################################################################-


# 1. LOAD DATA AND PACKAGES   ----------------



## 1A. LOAD PACKAGES  ------------

library(tidyverse)   # DATA CLEANING FUNCTIONS
library(stringr)     # STRING FUNCTIONS
library(tidytext)    # TEXT ANALYSIS FUNCTIONS


## 1B. DEFINE FUNCTIONS ----------------

# FUNCTION TO MAKE DUMMY VARIABLES
make_dummy <- function(test){
  ifelse(test, 1L, 0L)
}


## 1C. LOAD DATA ----------------


# This data file contains the original documents that we obtained from the
# NBER/Maryland State Constitutions Project
# (http://www.stateconstitutions.umd.edu/index.aspx). We then prepared the text
# following standard text-analysis procedures (e.g, Silge and Robinson 2017),
# transforming all text to lowercase, removing punctuation, removing stop words
# defined by the SMART information retrieval system, and then stemming the
# remaining words using the Snowball algorithm (http://snowballstem.org/).

doc_df <- read_rds("../Data/Derived/doc_df.rds")

# This data file indicates the distance between pairs of states and indicates
# whether each pair shares a border
border_dyads <- read_rds("../Data/Derived/border_dyads.rds")



# 2. CREATE DYADIC DATA  ------------------------------

dyads_df <- doc_df  %>%
  # ALL COMBINATIONS OF IDs
  expand(to = document_id, from = document_id)  %>%
  # MERGE WITH DOCUMENT LEVEL DATA
  full_join(doc_df, by = c("to" = "document_id"))  %>%
  full_join(doc_df, by = c("from" = "document_id"),
            suffix = c("_to", "_from"))  %>%
  # REMOVE TEXT
  select(-contains("text"))  %>%
  # COVARIATES
  mutate(
    to_after_from = date_to > date_from,
    same_state =  make_dummy(state_full_to == state_full_from),
    south_to = make_dummy(in_south_to == TRUE),
    south_from = make_dummy(in_south_from == TRUE),
    both_south = make_dummy(south_to + south_from == 2L),
    time_diff = as.difftime(date_to - date_from, units = "days")  %>%
      as.numeric(),
    us_from =  make_dummy(state_code_from == "USA"),
    # DIFFERENCE IN JTS MEASURE OF PARTISANSHIP
    party_same = case_when(
      party3_to == 0 ~ 0L,
      party3_to == party3_from  ~ 1L,
      party3_to * party3_from > 0 ~ 1L,
      party3_to * party3_from < 0 ~ -1L,
      TRUE ~ 0L
      ),
    party_diff = ifelse(party_same == -1, 1L, 0L))  %>%
  # MERGE WITH BORDER DATA
  left_join(
    border_dyads,
    by = c("state_full_to" = "a", "state_full_from" = "b")
    )  %>%
  mutate(
    share_border = ifelse(
      state_code_to == "USA" | state_code_from == "USA", 0L, share_border
      ),
    distance = ifelse(
      state_code_to == "USA" | state_code_from == "USA", 0, distance
      )
    )  %>%
  group_by(to, to_after_from)  %>%
  mutate(
    distance_rel = (distance - mean(distance, na.rm = TRUE)) /
      sd(distance, na.rm = TRUE),
    distance_rel = ifelse(state_code_to == "USA", 0, distance_rel),
    distance_rel = ifelse(
      state_code_from == "USA", min(distance_rel, na.rm = TRUE), distance_rel
      )
    )  %>%
  ungroup()




# 3. SAVE -----------

attr(dyads_df, "source") <- "Created in 'DataClean/SC-1- Dyadic Data.R'"
write_rds(dyads_df, path = "../Data/Derived/dyads_df.rds")

# DISPLAY VERSION NUMBERS FOR R & PACKAGES IN USE
sessionInfo()
